Metadata

Close
Metadata
@Misc{CostaFigTeiLimTei:2021:HaPoEs,
               author = "Costa, Willams and Figueiredo, Lucas and Teixeira, Jo{\~a}o 
                         Marcelo and Lima, Jo{\~a}o Paulo and Teichrieb, Veronica",
                title = "An Investigation of 2D Keypoints Detection on Challenging 
                         Scenarios Using Depthwise Separable Convolutions: A Hand Pose 
                         Estimation Case Study",
                 year = "2021",
                 date = "18-22 Oct. 2021",
             keywords = "real-time hand pose estimation, human-computer interaction, 
                         depthwise separable convolutions.",
           targetfile = "Hand3d_supplementary_material.pdf",
             abstract = "2D keypoints detection is a computer vision task applicable to 
                         several fields such as hand, face, and body tracking, which 
                         provides useful information for spatial analytics, gestural 
                         interactions, and augmented reality applications. This work 
                         investigates the usage of depthwise separable convolutions (an 
                         optimized convolution operation) to speed up the inference time on 
                         a largely used architecture for 2D keypoints estimation. We 
                         evaluate the impacts on the precision and performance of such 
                         optimization on a hand pose estimation task. We also extend the 
                         evaluation towards simulated challenging scenarios of defocused 
                         lens, motion blur, occlusions, and noisy images to understand how 
                         these stress situations affect both the original and the optimized 
                         architectures. We show that the execution time can be improved on 
                         average by 12.8\% with an accuracy compromise of less than 1 
                         pixel (mean EPE). The experiments on challenging scenarios 
                         revealed that the model powered by depthwise separable 
                         convolutions is most fit for the occlusion cases and noisy 
                         environments while suffering more on the motion blur simulated 
                         scenarios.",
          affiliation = "Voxar Labs, Centro de Inform{\'a}tica, Universidade Federal de 
                         Pernambuco and Voxar Labs, Centro de Inform{\'a}tica, 
                         Universidade Federal de Pernambuco and Voxar Labs, Centro de 
                         Inform{\'a}tica, Universidade Federal de Pernambuco and 
                         Departamento de Computa{\c{c}}{\~a}o, Universidade Federal Rural 
                         de Pernambuco and Voxar Labs, Centro de Inform{\'a}tica, 
                         Universidade Federal de Pernambuco",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/45CU7H5",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CU7H5",
        urlaccessdate = "2024, May 19"
}